Load Data
analysis_data = readRDS(here("cleaned_data.rds")) %>% mutate_if(is.character, factor)
var_cats = read_csv(here("names_to_keep.csv"))
## Parsed with column specification:
## cols(
## name = col_double(),
## value = col_character(),
## keep = col_logical(),
## discrim = col_logical(),
## outcome = col_logical()
## )
demo_names = filter(var_cats, discrim)$value %>% .[. %in% names(analysis_data)]
outcome_names = filter(var_cats, outcome)$value %>% .[. %in% names(analysis_data)]
no_demo_data = analysis_data %>% select(-all_of(demo_names))
outcome_data = analysis_data %>% select(all_of(outcome_names))
Descriptive Statistics
descr_stats = analysis_data %>% skim() %>% partition()
# descr_stats$logical %>% print_kable("Binary Variables")
descr_stats$factor %>% mutate_if(is.numeric, format_number) %>% print_kable("Factor Variables")
Factor Variables
|
skim_variable
|
n_missing
|
complete_rate
|
ordered
|
n_unique
|
top_counts
|
|
YEARSEQ
|
0
|
1
|
FALSE
|
118,556
|
-94: 1, 000: 1, 000: 1, 000: 1
|
|
STATE
|
0
|
1
|
FALSE
|
25
|
CA: 29564, NY: 17167, FL: 12520, TX: 8965
|
|
COUNTY
|
0
|
1
|
FALSE
|
65
|
Los: 10419, Coo: 5738, Dad: 4355, Mar: 4245
|
|
TOTCHGS1
|
46
|
1
|
FALSE
|
8
|
One: 51752, Two: 32156, Thr: 16085, Fou: 8376
|
|
OFFENSE1
|
0
|
1
|
FALSE
|
16
|
Oth: 20746, Dru: 20534, Ass: 14470, Lar: 11178
|
|
OFFTYPE1
|
0
|
1
|
FALSE
|
4
|
Dru: 41280, Pro: 37280, Vio: 29623, Pub: 10373
|
|
CHG1ATT
|
1,425
|
0.988
|
FALSE
|
2
|
No: 113998, Yes: 3133
|
|
OFFENSE2
|
111
|
0.999
|
FALSE
|
19
|
Not: 51752, Mis: 20818, Oth: 8221, Dru: 5503
|
|
OFFTYPE2
|
111
|
0.999
|
FALSE
|
7
|
Not: 51752, Mis: 20818, Pro: 14948, Dru: 13724
|
|
CHG2ATT
|
2,457
|
0.979
|
FALSE
|
3
|
No: 62783, Not: 51752, Yes: 1564
|
|
GENDER
|
358
|
0.997
|
FALSE
|
2
|
Mal: 98324, Fem: 19874
|
|
CJSTATUS
|
14,503
|
0.878
|
FALSE
|
2
|
Non: 66335, Act: 37718
|
|
CJRELPTR
|
18,700
|
0.842
|
FALSE
|
2
|
No: 85820, Yes: 14036
|
|
CJRELPRO
|
15,311
|
0.871
|
FALSE
|
2
|
No: 86021, Yes: 17224
|
|
CJRELPAR
|
14,503
|
0.878
|
FALSE
|
2
|
No: 97902, Yes: 6151
|
|
CJRELCUS
|
14,503
|
0.878
|
FALSE
|
2
|
No: 102465, Yes: 1588
|
|
CJRELDIV
|
22,431
|
0.811
|
FALSE
|
2
|
No: 95801, Yes: 324
|
|
CJRELFUG
|
22,431
|
0.811
|
FALSE
|
2
|
No: 93719, Yes: 2406
|
|
SERARR
|
9,987
|
0.916
|
FALSE
|
3
|
Fel: 64807, No : 30352, Mis: 13410
|
|
PRIARR
|
14,593
|
0.877
|
FALSE
|
10
|
0: 30352, 10 : 28036, 1: 9864, 2: 8165
|
|
PRIFARR
|
13,454
|
0.887
|
FALSE
|
10
|
0: 43799, 1: 13068, 10 : 12238, 2: 10071
|
|
PRIMARR
|
13,769
|
0.884
|
FALSE
|
10
|
0: 42490, 10 : 13712, 1: 13613, 2: 9852
|
|
PRIORFTA
|
10,220
|
0.914
|
FALSE
|
3
|
No: 37805, Not: 37117, Yes: 33414
|
|
SERCONV
|
7,076
|
0.94
|
FALSE
|
3
|
No : 46951, Fel: 44821, Mis: 19708
|
|
PRICONV
|
10,678
|
0.91
|
FALSE
|
10
|
0: 46951, 1: 14940, 2: 10471, 10 : 8962
|
|
PRIFCONV
|
8,455
|
0.929
|
FALSE
|
10
|
0: 66818, 1: 16698, 2: 9915, 3: 6053
|
|
PRIMCONV
|
9,482
|
0.92
|
FALSE
|
10
|
0: 59484, 1: 16358, 2: 10169, 3: 6456
|
|
PRIVCONV
|
10,154
|
0.914
|
FALSE
|
3
|
Not: 58126, No: 38998, Yes: 11278
|
|
PRIPRIS
|
15,673
|
0.868
|
FALSE
|
10
|
0: 81920, 1: 10278, 2: 5219, 3: 2663
|
|
PRIJAIL
|
14,190
|
0.88
|
FALSE
|
10
|
0: 63117, 1: 15344, 2: 8738, 3: 5370
|
|
RELDET1
|
3,530
|
0.97
|
FALSE
|
3
|
Rel: 70586, Det: 42618, Cas: 1822
|
|
RELDET2
|
3,530
|
0.97
|
FALSE
|
8
|
Non: 34501, Hel: 33983, Fin: 31828, Den: 7011
|
|
PTRMIS
|
297
|
0.997
|
FALSE
|
3
|
No: 47933, Not: 47673, Yes: 22653
|
|
FTA1
|
1,535
|
0.987
|
FALSE
|
3
|
No,: 54220, Not: 47218, Yes: 15583
|
|
FTA3
|
1,559
|
0.987
|
FALSE
|
4
|
Mad: 54219, Not: 47218, FTA: 11232, FTA: 4328
|
|
REGION
|
263
|
0.998
|
FALSE
|
4
|
Wes: 39890, Nor: 31666, Sou: 29055, Mid: 17682
|
|
APPTYP
|
0
|
1
|
FALSE
|
4
|
Non: 51307, Par: 44858, Gov: 21233, Leg: 1158
|
|
APPJUD
|
0
|
1
|
FALSE
|
2
|
Ele: 96165, App: 22391
|
|
RELPOS
|
0
|
1
|
FALSE
|
2
|
Yes: 114173, No: 4383
|
|
NOSCREEN
|
0
|
1
|
FALSE
|
2
|
Scr: 79822, No : 38734
|
|
PTRPRG89
|
0
|
1
|
FALSE
|
2
|
Pro: 79228, No : 39328
|
|
PTRPRG01
|
0
|
1
|
FALSE
|
2
|
Pro: 101252, No : 17304
|
|
PRSCOMP
|
23,189
|
0.804
|
FALSE
|
2
|
No: 66727, Yes: 28640, Bla: 0
|
|
WHITE
|
0
|
1
|
FALSE
|
2
|
No: 91025, Yes: 27531
|
|
BLACK
|
0
|
1
|
FALSE
|
2
|
No: 74955, Yes: 43601
|
|
OTHER
|
0
|
1
|
FALSE
|
2
|
No: 116212, Yes: 2344
|
|
HISP
|
0
|
1
|
FALSE
|
2
|
No: 93259, Yes: 25297
|
|
AGED
|
1,339
|
0.989
|
FALSE
|
8
|
25-: 20930, 21-: 19892, 30-: 18542, 18-: 18310
|
|
JUVSTAT
|
0
|
1
|
FALSE
|
2
|
Adu: 114667, Juv: 3889
|
|
FINREL
|
10,548
|
0.911
|
FALSE
|
2
|
Not: 76180, Mad: 31828, Bla: 0
|
descr_stats$numeric %>% mutate_if(is.numeric, format_number) %>% print_kable("Continuous Variables")
Continuous Variables
|
skim_variable
|
n_missing
|
complete_rate
|
mean
|
sd
|
p0
|
p25
|
p50
|
p75
|
p100
|
hist
|
|
YEAR
|
0
|
1
|
1,997
|
4.54
|
1,990
|
1,994
|
1,998
|
2,002
|
2,004
|
▇▃▇▃▇
|
|
AGE
|
1,341
|
0.989
|
30
|
9.98
|
12
|
22
|
28
|
36
|
90
|
▇▇▂▁▁
|
|
BAILAMT
|
54,656
|
0.539
|
36,094
|
157,465
|
1
|
3,000
|
10,000
|
25,000
|
10,000,000
|
▇▁▁▁▁
|
|
POPULAT
|
0
|
1
|
2,324,192
|
2,408,543
|
585,000
|
866,000
|
1,472,000
|
2,347,000
|
9,917,300
|
▇▁▁▁▁
|
|
POPU2
|
0
|
1
|
2,281,627
|
2,379,665
|
79,000
|
858,000
|
1,446,200
|
2,301,000
|
9,763,800
|
▇▂▁▁▁
|
|
CASES
|
0
|
1
|
1,909
|
1,277
|
240
|
1,082
|
1,572
|
2,231
|
6,400
|
▇▆▁▁▁
|
|
CASRAT
|
0
|
1
|
0.001
|
0.001
|
0
|
0.001
|
0.001
|
0.001
|
0.007
|
▇▁▁▁▁
|
|
UCRINDX
|
0
|
1
|
145,509
|
154,305
|
2,346
|
57,126
|
79,285
|
184,270
|
711,295
|
▇▂▁▁▁
|
|
UCRINDX2
|
0
|
1
|
154,654
|
164,141
|
2,815
|
58,167
|
83,402
|
187,048
|
719,401
|
▇▂▁▁▁
|
|
CRMRT
|
0
|
1
|
0.067
|
0.037
|
0.003
|
0.043
|
0.063
|
0.078
|
0.309
|
▇▆▁▁▁
|
|
CRMRT2
|
0
|
1
|
0.074
|
0.058
|
0.003
|
0.047
|
0.067
|
0.084
|
0.766
|
▇▁▁▁▁
|
|
CRMRTCHG
|
0
|
1
|
-0.007
|
0.044
|
-0.696
|
-0.01
|
-0.004
|
0.001
|
0.149
|
▁▁▁▁▇
|
|
DELCRMRT
|
0
|
1
|
-3.687
|
40.4
|
-90.863
|
-14.157
|
-6.487
|
1.51
|
788
|
▇▁▁▁▁
|
|
ADP
|
1,128
|
0.99
|
173
|
85
|
2
|
102
|
187
|
254
|
280
|
▃▃▃▅▇
|
|
ADP2
|
1,128
|
0.99
|
175
|
85.2
|
2
|
106
|
191
|
256
|
283
|
▃▃▃▅▇
|
|
RATCAP
|
1,128
|
0.99
|
156
|
76.8
|
2
|
91
|
172
|
230
|
253
|
▃▃▃▅▇
|
|
RATCAP2
|
1,128
|
0.99
|
159
|
76.9
|
2
|
92
|
175
|
231
|
257
|
▃▃▃▅▇
|
|
JALAVAL
|
1,128
|
0.99
|
146
|
85
|
2
|
76
|
143
|
219
|
292
|
▇▇▇▇▇
|
|
JALAVAL2
|
1,128
|
0.99
|
146
|
83.8
|
2
|
78
|
140
|
220
|
290
|
▇▇▇▆▇
|
|
DELTJLAVL
|
1,128
|
0.99
|
146
|
81.5
|
1
|
74
|
147
|
215
|
292
|
▆▇▆▇▆
|
|
WHTP
|
0
|
1
|
1,075,965
|
875,226
|
107,284
|
535,609
|
750,341
|
1,261,087
|
3,610,800
|
▇▃▁▁▂
|
|
WHTP2
|
13,537
|
0.886
|
154
|
86.2
|
2
|
76
|
155
|
239
|
278
|
▅▅▅▅▇
|
|
BLACKP
|
0
|
1
|
398,736
|
378,252
|
6,018
|
111,696
|
218,653
|
577,677
|
1,423,731
|
▇▃▁▂▁
|
|
BLACKP2
|
0
|
1
|
393,123
|
378,608
|
5,694
|
105,137
|
212,971
|
568,349
|
1,423,731
|
▇▃▁▂▁
|
|
ASIANP
|
0
|
1
|
196,116
|
320,307
|
3,663
|
22,984
|
62,447
|
210,312
|
1,321,268
|
▇▁▁▁▁
|
|
HISPP
|
0
|
1
|
664,536
|
1,082,081
|
3,227
|
63,668
|
239,483
|
734,569
|
4,610,627
|
▇▁▁▁▁
|
|
HISPP2
|
13,537
|
0.886
|
164
|
83.4
|
2
|
95
|
175
|
239
|
278
|
▃▃▅▅▇
|
|
PCTBLCK
|
0
|
1
|
0.2
|
0.146
|
0.008
|
0.095
|
0.169
|
0.264
|
0.666
|
▇▆▁▃▁
|
|
PCTHSP
|
0
|
1
|
0.203
|
0.158
|
0.005
|
0.064
|
0.174
|
0.29
|
0.807
|
▇▆▂▁▁
|
|
PCTBLCK2
|
0
|
1
|
0.199
|
0.145
|
0.008
|
0.095
|
0.164
|
0.263
|
0.669
|
▇▆▁▃▁
|
|
PCTHSP2
|
13,537
|
0.886
|
149
|
77.1
|
2
|
82.5
|
155
|
221
|
263
|
▅▅▆▆▇
|
|
PCTBCHG
|
0
|
1
|
0.001
|
0.023
|
-0.355
|
-0.001
|
0.001
|
0.005
|
0.085
|
▁▁▁▁▇
|
|
PCTHCHG
|
13,537
|
0.886
|
152
|
77.6
|
2
|
93
|
158
|
219
|
279
|
▆▅▇▇▇
|
|
DELTBPOP
|
0
|
1
|
1.25
|
10.3
|
-91.54
|
-0.593
|
1.05
|
3.01
|
124
|
▁▁▇▁▁
|
|
DELTHPOP
|
13,537
|
0.886
|
130
|
80.2
|
1
|
62
|
122
|
197
|
279
|
▇▇▇▆▆
|
|
EXPND
|
19,965
|
0.832
|
52,260,613
|
68,656,458
|
439,535
|
13,572,816
|
28,200,000
|
50,549,963
|
373,000,000
|
▇▁▁▁▁
|
|
FULEMP
|
19,493
|
0.836
|
99.9
|
50.1
|
2
|
57
|
103
|
146
|
171
|
▃▅▅▆▇
|
|
PRTEMP
|
19,493
|
0.836
|
14.4
|
15.8
|
2
|
2
|
5
|
26
|
51
|
▇▁▂▁▁
|
|
POLCRT
|
0
|
1
|
0.286
|
0.479
|
0
|
0.05
|
0.18
|
0.44
|
8.96
|
▇▁▁▁▁
|
|
POLJAL
|
0
|
1
|
0.882
|
1.19
|
0
|
0
|
0.37
|
1.27
|
9.96
|
▇▁▁▁▁
|
|
POLFT_PT
|
0
|
1
|
31.1
|
12.5
|
9.23
|
22.3
|
28.1
|
35.9
|
90.5
|
▅▇▁▁▁
|
|
POLTOT
|
31,534
|
0.734
|
135
|
66.4
|
2
|
79
|
145
|
202
|
219
|
▃▃▃▃▇
|
|
POLRTC
|
31,534
|
0.734
|
127
|
68.5
|
2
|
70
|
126
|
191
|
237
|
▆▅▇▆▇
|
|
POLRT
|
8,856
|
0.925
|
22.3
|
13.9
|
2
|
8
|
21
|
33
|
51
|
▇▇▅▅▃
|
|
DRGPOL
|
8,856
|
0.925
|
21.8
|
10.2
|
2
|
14
|
23
|
30
|
38
|
▆▆▅▇▇
|
|
PRCP
|
19,965
|
0.832
|
145
|
72.4
|
2
|
85
|
154
|
209
|
257
|
▅▅▆▇▇
|
|
PRCPEMP
|
19,493
|
0.836
|
145
|
71.9
|
2
|
90
|
154
|
205
|
259
|
▅▆▆▇▇
|
|
BLXDLTB
|
0
|
1
|
0.561
|
5.67
|
-91.54
|
0
|
0
|
0
|
124
|
▁▁▇▁▁
|
|
HPXDLTH
|
2,584
|
0.978
|
27.8
|
42.1
|
1
|
13
|
13
|
13
|
257
|
▇▁▁▁▁
|
|
BLXPCTB
|
0
|
1
|
0.092
|
0.15
|
0
|
0
|
0
|
0.169
|
0.666
|
▇▂▁▁▁
|
|
HPXPCTH
|
0
|
1
|
0.066
|
0.146
|
0
|
0
|
0
|
0
|
0.807
|
▇▁▁▁▁
|
|
PCTBLKSQ
|
0
|
1
|
0.061
|
0.081
|
0
|
0.009
|
0.029
|
0.07
|
0.443
|
▇▁▁▁▁
|
|
PCTHSPSQ
|
0
|
1
|
0.066
|
0.092
|
0
|
0.004
|
0.03
|
0.084
|
0.651
|
▇▂▁▁▁
|
|
BAILLN
|
54,656
|
0.539
|
406
|
222
|
2
|
217
|
429
|
574
|
1,000
|
▆▇▇▅▁
|
|
PBLKLN
|
0
|
1
|
-1.944
|
0.904
|
-4.793
|
-2.349
|
-1.777
|
-1.332
|
-0.407
|
▁▂▃▇▃
|
|
PHSPLN
|
0
|
1
|
-2.054
|
1.14
|
-5.316
|
-2.747
|
-1.75
|
-1.237
|
-0.214
|
▁▂▃▇▅
|
|
PRCPLN
|
19,965
|
0.832
|
144
|
72
|
2
|
85
|
153
|
208
|
256
|
▅▅▆▇▇
|
|
JLAVLLN
|
1,128
|
0.99
|
144
|
84.1
|
2
|
74
|
140
|
216
|
289
|
▇▇▇▇▇
|
|
CRMRTLN
|
0
|
1
|
-2.834
|
0.514
|
-5.928
|
-3.136
|
-2.771
|
-2.554
|
-1.174
|
▁▁▃▇▁
|
|
CASRATLN
|
0
|
1
|
-6.932
|
0.547
|
-8.625
|
-7.35
|
-6.926
|
-6.535
|
-4.903
|
▁▆▇▂▁
|
|
AGESQ
|
1,339
|
0.989
|
20
|
9.99
|
2
|
12
|
18
|
26
|
79
|
▇▇▂▁▁
|
Correlation Matrix
analysis_data %>% mutate_all(as.numeric) %>% cor(use = "pairwise.complete.obs") %>% ggcorrplot(p.mat = cor_pmat(.), hc.order = TRUE)

# p.mat = cor_pmat(.), hc.order = TRUE
Flight Risk Accuracy
Here we are trying to estimate the “accuracy” of those determining whether a person charged with a crime is a flight risk, and therefore should be detained until their trial. While we obviously cannot know whether someone who was detained until trial would have returned for their trial or not from this data, we can attempt to measure this from those who were released after paying their bail. Someone who was released on bail would likely have been considered a significantly greater flight risk than a person released with no bail. We can attempt to measure the amount of false positives by looking at people who were deemed a high enough flight risk for bail, but still returned. Those who were released on bail and did not return are considered to be “correctly” classified here. If someone was released and returned then they were correctly classified, while those who were released without bail and did not return would be considered a false negative. This is not a perfect measurement of classification accuracy, since bail is a variable amount, and is different than being released even if a high flight risk with no collateral, but should be close enough to determine how accuracte and equitable humans are in this process.
tibble(" " = c("Paid Bail", "No Bail"), "Appeared at Court" = c("False Positive", "Correct"), "Did Not Appear" = c("Correct", "False Negative")) %>% print_kable()
|
|
Appeared at Court
|
Did Not Appear
|
|
Paid Bail
|
False Positive
|
Correct
|
|
No Bail
|
Correct
|
False Negative
|
accuracy_data = analysis_data %>%
filter(RELDET1 == "Released") %>%
filter(!is.na(FTA1)) %>%
mutate(no_bail = (FINREL == "Not made bail"),
fta = FTA1 == "Yes, FTA",
pred_acc = ifelse(fta,
ifelse(no_bail, "False Negative", "Correct"),
ifelse(no_bail, "Correct", "False Positive")),
race = ifelse(WHITE == "Yes", "White", ifelse(BLACK == "Yes", "Black", ifelse(HISP == "Yes", "Latinx", "Other"))))
# accuracy_data %>% select(FINREL, no_bail, fta, FTA1, pred_acc) %>% distinct() %>% View()
accuracy_data %>% count(pred_acc) %>% na.omit() %>% mutate(percent = scales::percent(n / sum(n), accuracy = 0.1)) %>% select(-n) %>% rename("Prediction Accuracy" = pred_acc) %>% print_kable("Overall Accuracy")
Overall Accuracy
|
Prediction Accuracy
|
percent
|
|
Correct
|
47.8%
|
|
False Negative
|
13.4%
|
|
False Positive
|
38.9%
|
accuracy_data %>% count(GENDER, pred_acc) %>% na.omit() %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Gender" = GENDER) %>% print_kable("Accuracy by Gender")
## Warning: Factor `GENDER` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Accuracy by Gender
|
Gender
|
Correct
|
False Negative
|
False Positive
|
|
Male
|
46.8%
|
13.4%
|
39.8%
|
|
Female
|
51.2%
|
13.3%
|
35.5%
|
accuracy_data %>% count(AGED, pred_acc) %>% na.omit() %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Age Category" = AGED) %>% print_kable("Accuracy by Age Category")
## Warning: Factor `AGED` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Accuracy by Age Category
|
Age Category
|
Correct
|
False Negative
|
False Positive
|
|
1-17
|
56%
|
14.4%
|
29.6%
|
|
18-20
|
48.9%
|
12.4%
|
38.8%
|
|
21-24
|
46.1%
|
12.7%
|
41.2%
|
|
25-29
|
46.7%
|
14%
|
39.3%
|
|
30-34
|
47%
|
14.4%
|
38.5%
|
|
35-39
|
47.1%
|
14.5%
|
38.4%
|
|
40-49
|
47.7%
|
13.7%
|
38.6%
|
|
50+
|
49.4%
|
8.6%
|
42%
|
accuracy_data %>% count(race, pred_acc) %>% na.omit() %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Race" = race) %>% print_kable("Accuracy by Race")
Accuracy by Race
|
Race
|
Correct
|
False Negative
|
False Positive
|
|
Black
|
47.4%
|
14.9%
|
37.7%
|
|
Latinx
|
48.8%
|
15.5%
|
35.7%
|
|
Other
|
48.1%
|
12.5%
|
39.4%
|
|
White
|
47.2%
|
10.5%
|
42.3%
|
# accuracy_data %>% count(c_charge_degree, pred_acc) %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Charge Degree" = c_charge_degree) %>% print_kable("COMPAS Accuracy by Charge Degree")
#
# collapse_by = function(x, collapse_func = quantile, inclusive = TRUE) {
# raw_values = collapse_func(x)
# out = character()
# if (inclusive) {
# indices = 1:(length(raw_values) - 1)
# for (val_index in indices) {
# out[x >= raw_values[val_index] & x <= raw_values[val_index + 1]] = ifelse(val_index == length(indices), paste("<=", (raw_values[val_index + 1])), paste("<", (raw_values[val_index + 1])))
# }
# }
# factor(out)
# }
#
# accuracy_data %>% mutate(priors_count = collapse_by(priors_count)) %>% count(priors_count, pred_acc) %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Priors Count" = priors_count) %>% print_kable("COMPAS Accuracy by Priors Count")
#
# accuracy_data %>% filter(!is.na(offense_year)) %>% count(offense_year, pred_acc) %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Year" = offense_year) %>% print_kable("COMPAS Accuracy by Year of Offense")
#
# accuracy_data %>% filter(!is.na(offense_month)) %>% count(offense_month, pred_acc) %>% crosstab_percent(formatted = TRUE) %>% select(-n) %>% pivot_wider(names_from = pred_acc, values_from = percent) %>% rename("Month" = offense_month) %>% print_kable("COMPAS Accuracy by Month of Offense")
Equity Tables
equity_table = function(data_set, demo_var, recid_var, demo_label = str_to_sentence(demo_var), recid_label = "Rate", format_perc = TRUE) {
data_set %>%
count(!!sym(demo_var), !!sym(recid_var)) %>%
na.omit() %>%
crosstab_percent(vars = demo_var, formatted = format_perc) %>%
select(-n) %>%
pivot_wider(names_from = recid_var, values_from = "percent") %>%
select(-`FALSE`) %>% (function(x) {names(x) = c(demo_label, recid_label); x})
}
equity_data = analysis_data %>%
mutate(no_bail = (FINREL == "Not made bail"),
fta = FTA1 == "Yes, FTA",
race = ifelse(WHITE == "Yes", "White", ifelse(BLACK == "Yes", "Black", ifelse(HISP == "Yes", "Latinx", "Other"))))
Release Rates
equity_data %>%
mutate(RELDET1 = RELDET1 == "Released") %>%
count(RELDET1) %>%
na.omit() %>%
mutate(Percent = scales::percent(n / sum(n), accuracy = 0.1), RELDET1 = ifelse(RELDET1, "Yes", "No")) %>%
rename("Released" = RELDET1) %>%
select(-n) %>%
print_kable("Overall Release Rate")
Overall Release Rate
|
Released
|
Percent
|
|
No
|
38.6%
|
|
Yes
|
61.4%
|
equity_data %>%
mutate(RELDET1 = RELDET1 == "Released") %>%
equity_table("GENDER", recid_var = "RELDET1") %>%
print_kable("Release Rate by Gender")
## Warning: Factor `GENDER` contains implicit NA, consider using
## `forcats::fct_explicit_na`
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(recid_var)` instead of `recid_var` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
Release Rate by Gender
|
Gender
|
Rate
|
|
Male
|
58.9%
|
|
Female
|
73%
|
equity_data %>%
mutate(RELDET1 = RELDET1 == "Released") %>%
equity_table("AGED", recid_var = "RELDET1", demo_label = "Age Group") %>%
print_kable("Release Rate by Age Group")
## Warning: Factor `AGED` contains implicit NA, consider using
## `forcats::fct_explicit_na`
Release Rate by Age Group
|
Age Group
|
Rate
|
|
1-17
|
68.9%
|
|
18-20
|
66.9%
|
|
21-24
|
61.3%
|
|
25-29
|
60.1%
|
|
30-34
|
58.3%
|
|
35-39
|
58.4%
|
|
40-49
|
58.9%
|
|
50+
|
67.5%
|
equity_data %>%
mutate(RELDET1 = RELDET1 == "Released") %>%
equity_table("race", recid_var = "RELDET1") %>%
print_kable("Release Rate by Race")
Release Rate by Race
|
Race
|
Rate
|
|
Black
|
60.4%
|
|
Latinx
|
53.9%
|
|
Other
|
64.8%
|
|
White
|
67.1%
|
Bail Rates
bail_data = equity_data %>% filter(RELDET2 == "Financial release" | RELDET2 == "Held on bail" | RELDET2 == "Denied bail") %>% mutate(offered_bail = RELDET2 != "Denied bail")
On average, 90.4% of people charged with a crime and were not released, were offered to be released on bail.
bail_data %>% filter(!is.na(GENDER)) %>% group_by(GENDER) %>% summarise("Rate" = mean(offered_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Gender" = GENDER) %>% print_kable("Bail Rate by Gender")
Bail Rate by Gender
|
Gender
|
Rate
|
|
Male
|
89.9%
|
|
Female
|
93.2%
|
bail_data %>% filter(!is.na(AGED)) %>% group_by(AGED) %>% summarise("Rate" = mean(offered_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Age Category" = AGED) %>% print_kable("Bail Rate by Age Category")
Bail Rate by Age Category
|
Age Category
|
Rate
|
|
1-17
|
87.6%
|
|
18-20
|
91.3%
|
|
21-24
|
90.6%
|
|
25-29
|
89.6%
|
|
30-34
|
89.5%
|
|
35-39
|
90.4%
|
|
40-49
|
90.9%
|
|
50+
|
92.0%
|
bail_data %>% filter(!is.na(race)) %>% group_by(race) %>% summarise("Rate" = mean(offered_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Race" = race) %>% print_kable("Bail Rate by Race")
Bail Rate by Race
|
Race
|
Rate
|
|
Black
|
88.9%
|
|
Latinx
|
90.4%
|
|
Other
|
92.5%
|
|
White
|
91.0%
|
Bail Amount
bail_amt_data = equity_data %>% filter(!(is.na(BAILAMT)))
The median bail amount overall is $10,000.
bail_amt_data %>% select(GENDER, BAILAMT) %>% na.omit() %>% group_by(GENDER) %>% summarise(BAILAMT = median(BAILAMT)) %>% rename("Gender" = GENDER, "Median Bail" = BAILAMT) %>% mutate_if(is.numeric, format_number) %>% print_kable("Median Bail Amount by Gender")
Median Bail Amount by Gender
|
Gender
|
Median Bail
|
|
Male
|
10,000
|
|
Female
|
5,000
|
bail_amt_data %>% select(AGED, BAILAMT) %>% na.omit() %>% group_by(AGED) %>% summarise(BAILAMT = median(BAILAMT)) %>% rename("Age Category" = AGED, "Median Bail" = BAILAMT) %>% mutate_if(is.numeric, format_number) %>% print_kable("Median Bail Amount by Age Category")
Median Bail Amount by Age Category
|
Age Category
|
Median Bail
|
|
1-17
|
5,000
|
|
18-20
|
7,500
|
|
21-24
|
10,000
|
|
25-29
|
10,000
|
|
30-34
|
10,000
|
|
35-39
|
10,000
|
|
40-49
|
10,000
|
|
50+
|
10,000
|
bail_amt_data %>% select(race, BAILAMT) %>% na.omit() %>% group_by(race) %>% summarise(BAILAMT = median(BAILAMT)) %>% rename("Race" = race, "Median Bail" = BAILAMT) %>% mutate_if(is.numeric, format_number) %>% print_kable("Median Bail Amount by Race")
Median Bail Amount by Race
|
Race
|
Median Bail
|
|
Black
|
7,500
|
|
Latinx
|
10,000
|
|
Other
|
8,000
|
|
White
|
7,500
|
Paid Bail Rates
paid_bail_data = bail_amt_data %>% mutate(paid_bail = FINREL == "Made bail") %>% filter(!is.na(paid_bail))
On average, 48.4% of people charged with a crime were able to pay their bail.
paid_bail_data %>% filter(!is.na(GENDER)) %>% group_by(GENDER) %>% summarise("Rate" = mean(paid_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Gender" = GENDER) %>% print_kable("Paid Bail Rate by Gender")
Paid Bail Rate by Gender
|
Gender
|
Rate
|
|
Male
|
46.4%
|
|
Female
|
59.9%
|
paid_bail_data %>% filter(!is.na(AGED)) %>% group_by(AGED) %>% summarise("Rate" = mean(paid_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Age Category" = AGED) %>% print_kable("Paid Bail Rate by Age Category")
Paid Bail Rate by Age Category
|
Age Category
|
Rate
|
|
1-17
|
49.7%
|
|
18-20
|
54.2%
|
|
21-24
|
50.1%
|
|
25-29
|
48.0%
|
|
30-34
|
45.3%
|
|
35-39
|
44.9%
|
|
40-49
|
44.9%
|
|
50+
|
55.8%
|
paid_bail_data %>% filter(!is.na(race)) %>% group_by(race) %>% summarise("Rate" = mean(paid_bail) %>% scales::percent(accuracy = 0.1)) %>% rename("Race" = race) %>% print_kable("Paid Bail Rate by Race")
Paid Bail Rate by Race
|
Race
|
Rate
|
|
Black
|
47.9%
|
|
Latinx
|
37.3%
|
|
Other
|
51.4%
|
|
White
|
57.2%
|